1 Introduction

The purpose of this notebook is to find subclusters and annotate CD4 T cells with the feedback we received from the annotation team.

1.1 Load packages

library(Seurat)
library(tidyverse)

1.2 Parameters

# Paths
path_to_obj <- here::here("scRNA-seq/results/R_objects/level_3/CD4_T/CD4_T_clustered_level_3_with_pre_freeze.rds")
path_to_save <- here::here("scRNA-seq/results/R_objects/level_3/CD4_T/CD4_T_annotated_level_3.rds")
path_to_save_df <- here::here("scRNA-seq/3-clustering/3-level_3/tmp/CD4_T/CD4_T_annotation_level_3_df.rds")


# Colors
color_palette <-  c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D", "#5A5156", "#AA0DFE",   
                    "#F8A19F", "#F7E1A0", "#1C8356", "#FEAF16", "#822E1C", "#C4451C",   
                    "#1CBE4F", "#325A9B", "#F6222E", "#FE00FA", "#FBE426", "#16FF32", 
                    "black",   "#3283FE", "#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3",   
                    "#90AD1C", "#FE00FA", "#85660D", "#3B00FB", "#822E1C", "coral2", 
                    "#1CFFCE", "#1CBE4F", "#3283FE", "#FBE426", "#F7E1A0", "#325A9B",   
                    "#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD", "#FEAF16", "#683B79",   
                    "#B10DA1", "#1C7F93", "#F8A19F", "dark orange", "#FEAF16", "#FBE426",  
                    "Brown")

# Functions
source(here::here("scRNA-seq/bin/utils.R"))


# Additional function
plot_subcluster <- function(seurat_obj, pattern) {
  p <- seurat_obj@reductions$umap@cell.embeddings %>%
    as.data.frame() %>%
    mutate(cluster = seurat_obj$annotation_level_3) %>%
    filter(str_detect(cluster, pattern)) %>%
    ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
      geom_point(size = 0.1) +
      theme_classic()
  p
}

1.3 Load data

# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat 
## 37378 features across 78974 samples within 1 assay 
## Active assay: RNA (37378 features, 0 variable features)
##  3 dimensional reductions calculated: pca, umap, harmony
DimPlot(seurat, cols = color_palette, pt.size = 0.2)

2 Find Subclusters

2.1 Cluster 1

seurat$annotation_level_3 <- seurat$seurat_clusters
Idents(seurat) <- "annotation_level_3"
seurat <- FindSubCluster(
  seurat,
  cluster = "1",
  graph.name = "RNA_nn",
  resolution = 0.25,
  subcluster.name = "annotation_level_3"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11263
## Number of edges: 102967
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7959
## Number of communities: 20
## Elapsed time: 0 seconds
Idents(seurat) <- "annotation_level_3"
DimPlot(seurat, cols = color_palette, pt.size = 0.2)

plot_subcluster(seurat, "^1_")

Markers:

clusters_1 <- seurat$annotation_level_3 %>%
  unique() %>%
  str_subset("^1_") %>%
  sort()
markers_1 <- purrr::map(clusters_1, function(x) {
  group_1 <- clusters_1[which(clusters_1 == x)]
  group_2 <- clusters_1[which(clusters_1 != x)]
  df <- FindMarkers(
    seurat,
    ident.1 = group_1,
    ident.2 = group_2,
    only.pos = TRUE,
    logfc.threshold = 0.5,
    verbose = TRUE
  )
  df <- df %>%
    rownames_to_column(var = "gene") %>%
    arrange(desc(avg_log2FC))
  df
})
names(markers_1) <- clusters_1
DT::datatable(markers_1$`1_0`)
DT::datatable(markers_1$`1_1`)

2.2 Cluster 5

seurat <- FindSubCluster(
  seurat,
  cluster = "5",
  graph.name = "RNA_snn",
  resolution = 0.3,
  subcluster.name = "annotation_level_3"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5335
## Number of edges: 163910
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8391
## Number of communities: 4
## Elapsed time: 0 seconds
Idents(seurat) <- "annotation_level_3"
DimPlot(seurat, cols = color_palette, pt.size = 0.2)

plot_subcluster(seurat, "^5_")

Markers:

clusters_5 <- seurat$annotation_level_3 %>%
  unique() %>%
  str_subset("^5_") %>%
  sort()
markers_5 <- purrr::map(clusters_5, function(x) {
  group_1 <- clusters_5[which(clusters_5 == x)]
  group_2 <- clusters_5[which(clusters_5 != x)]
  df <- FindMarkers(
    seurat,
    ident.1 = group_1,
    ident.2 = group_2,
    only.pos = TRUE,
    logfc.threshold = 0.5,
    verbose = TRUE
  )
  df <- df %>%
    rownames_to_column(var = "gene") %>%
    arrange(desc(avg_log2FC))
  df
})
names(markers_5) <- clusters_5
DT::datatable(markers_5$`5_0`)
DT::datatable(markers_5$`5_1`)
DT::datatable(markers_5$`5_2`)
DT::datatable(markers_5$`5_3`)

2.3 Cluster 9

seurat <- FindSubCluster(
  seurat,
  cluster = "9",
  graph.name = "RNA_snn",
  resolution = 0.15,
  subcluster.name = "annotation_level_3"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3832
## Number of edges: 100622
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9064
## Number of communities: 4
## Elapsed time: 0 seconds
Idents(seurat) <- "annotation_level_3"
DimPlot(seurat, cols = color_palette, pt.size = 0.2)

plot_subcluster(seurat, "^9_")

Markers:

clusters_9 <- seurat$annotation_level_3 %>%
  unique() %>%
  str_subset("^9_") %>%
  sort()
markers_9 <- purrr::map(clusters_9, function(x) {
  group_1 <- clusters_9[which(clusters_9 == x)]
  group_2 <- clusters_9[which(clusters_9 != x)]
  print(group_1)
  print(group_2)
  df <- FindMarkers(
    seurat,
    ident.1 = group_1,
    ident.2 = group_2,
    only.pos = TRUE,
    logfc.threshold = 0.5,
    verbose = TRUE
  )
  df <- df %>%
    rownames_to_column(var = "gene") %>%
    arrange(desc(avg_log2FC))
  df
})
## [1] "9_0"
## [1] "9_1" "9_2" "9_3"
## [1] "9_1"
## [1] "9_0" "9_2" "9_3"
## [1] "9_2"
## [1] "9_0" "9_1" "9_3"
## [1] "9_3"
## [1] "9_0" "9_1" "9_2"
names(markers_9) <- clusters_9
DT::datatable(markers_9$`9_0`)
DT::datatable(markers_9$`9_1`)
DT::datatable(markers_9$`9_2`)
DT::datatable(markers_9$`9_3`)

2.4 Cluster 11

seurat <- FindSubCluster(
  seurat,
  cluster = "11",
  graph.name = "RNA_nn",
  resolution = 0.25,
  subcluster.name = "annotation_level_3"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2230
## Number of edges: 22679
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7868
## Number of communities: 4
## Elapsed time: 0 seconds
Idents(seurat) <- "annotation_level_3"
DimPlot(seurat, cols = color_palette, pt.size = 0.2)

plot_subcluster(seurat, "^11_")

Markers:

clusters_11 <- seurat$annotation_level_3 %>%
  unique() %>%
  str_subset("^11_") %>%
  sort()
markers_11 <- purrr::map(clusters_11, function(x) {
  group_1 <- clusters_11[which(clusters_11 == x)]
  group_2 <- clusters_11[which(clusters_11 != x)]
  df <- FindMarkers(
    seurat,
    ident.1 = group_1,
    ident.2 = group_2,
    only.pos = TRUE,
    logfc.threshold = 0.5,
    verbose = TRUE
  )
  df <- df %>%
    rownames_to_column(var = "gene") %>%
    arrange(desc(avg_log2FC))
  df
})
names(markers_11) <- clusters_11
DT::datatable(markers_11$`11_0`)
DT::datatable(markers_11$`11_1`)

3 Annotation

annotation_level_3 <- c(
  "0" = "Naive",
  "1_0" = "Follicular Th CXCL13+CBLB+",
  "1_1" = "Follicular Th TOX2+",
  "2" = "Follicular Th CXCR5+",
  "3" = "Mitochondrial+ T cells",
  "4" = "Central Mem PASK-",
  "5_0" = "IL2RA+FOXP3+ Treg",
  "5_1" = "Th17",
  "5_2" = "Memory T cells",
  "5_3" = "Doublets",
  "6" = "CD8",
  "7" = "Central Mem PASK+",
  "8" = "Doublets",
  "9_0" = "CD200+TOX+",
  "9_1" = "Naive",
  "9_2" = "metabolic/poor-quality",
  "9_3" = "activated CD4 T",
  "10" = "CD4 Non-Tfh KLRB1+ ",
  "11_0" = "naive Treg IKZF2+",
  "11_1" = "Treg IKZF2+HPGD+",
  "12" = "Proliferative",
  "13" = "Proliferative",
  "14" = "poor-quality",
  "15" = "poor-quality"
)
seurat$annotation_level_3 <- annotation_level_3[seurat$annotation_level_3]
DimPlot(
  seurat,
  group.by = "annotation_level_3",
  cols = color_palette,
  pt.size = 0.2
)

4 Save

saveRDS(seurat, path_to_save)
seurat$cell_barcode <- colnames(seurat)
df <- seurat@meta.data[, c("cell_barcode", "annotation_level_3")]
saveRDS(df, path_to_save_df)

5 Session Information

sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server release 6.7 (Santiago)
## 
## Matrix products: default
## BLAS:   /apps/R/4.0.1/lib64/R/lib/libRblas.so
## LAPACK: /apps/R/4.0.1/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1      stringr_1.4.0      dplyr_1.0.4        purrr_0.3.4        readr_1.4.0        tidyr_1.1.2        tibble_3.0.6       ggplot2_3.3.3      tidyverse_1.3.0    SeuratObject_4.0.0 Seurat_4.0.0       BiocStyle_2.16.1  
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1         backports_1.2.1      plyr_1.8.6           igraph_1.2.6         lazyeval_0.2.2       splines_4.0.1        crosstalk_1.1.1      listenv_0.8.0        scattermore_0.7      digest_0.6.27        htmltools_0.5.1.1    fansi_0.4.2          magrittr_2.0.1       tensor_1.5           cluster_2.1.0        ROCR_1.0-11          globals_0.14.0       modelr_0.1.8         matrixStats_0.58.0   colorspace_2.0-0     rvest_0.3.6          ggrepel_0.9.1        haven_2.3.1          xfun_0.21            crayon_1.4.1         jsonlite_1.7.2       spatstat_1.64-1      spatstat.data_2.0-0  survival_3.2-3       zoo_1.8-8            glue_1.4.2           polyclip_1.10-0      gtable_0.3.0         leiden_0.3.7         future.apply_1.7.0   abind_1.4-5          scales_1.1.1         DBI_1.1.1            miniUI_0.1.1.1       Rcpp_1.0.6           viridisLite_0.3.0    xtable_1.8-4         reticulate_1.18      DT_0.13              htmlwidgets_1.5.3    httr_1.4.2           RColorBrewer_1.1-2   ellipsis_0.3.1       ica_1.0-2            pkgconfig_2.0.3      farver_2.0.3         sass_0.3.1           uwot_0.1.10          dbplyr_2.1.0         deldir_0.2-10        utf8_1.1.4          
##  [57] here_1.0.1           tidyselect_1.1.0     labeling_0.4.2       rlang_0.4.10         reshape2_1.4.4       later_1.1.0.1        munsell_0.5.0        cellranger_1.1.0     tools_4.0.1          cli_2.3.1            generics_0.1.0       broom_0.7.4          ggridges_0.5.3       evaluate_0.14        fastmap_1.1.0        yaml_2.2.1           goftest_1.2-2        knitr_1.31           fs_1.5.0             fitdistrplus_1.1-3   RANN_2.6.1           pbapply_1.4-3        future_1.21.0        nlme_3.1-148         mime_0.10            xml2_1.3.2           compiler_4.0.1       rstudioapi_0.13      plotly_4.9.3         png_0.1-7            spatstat.utils_2.0-0 reprex_1.0.0         bslib_0.2.4          stringi_1.5.3        highr_0.8            ps_1.5.0             lattice_0.20-41      Matrix_1.2-18        vctrs_0.3.6          pillar_1.5.0         lifecycle_1.0.0      BiocManager_1.30.10  lmtest_0.9-38        jquerylib_0.1.3      RcppAnnoy_0.0.18     data.table_1.14.0    cowplot_1.1.1        irlba_2.3.3          httpuv_1.5.5         patchwork_1.1.1      R6_2.5.0             bookdown_0.21        promises_1.2.0.1     KernSmooth_2.23-17   gridExtra_2.3        parallelly_1.23.0   
## [113] codetools_0.2-16     MASS_7.3-51.6        assertthat_0.2.1     rprojroot_2.0.2      withr_2.4.1          sctransform_0.3.2    mgcv_1.8-31          parallel_4.0.1       hms_1.0.0            grid_4.0.1           rpart_4.1-15         rmarkdown_2.7        Rtsne_0.15           shiny_1.6.0          lubridate_1.7.9.2